
import urllib.request
from lxml import etree

# 获取网页源码
url = 'https://www.baidu.com/'
headers = {
    'Cookie': 'BIDUPSID=E04D67B5F25C419FF5CAC9517D0526B4; PSTM=1733134135; BAIDUID=E04D67B5F25C419F6775388B1B95BE63:FG=1; BD_UPN=12314753; H_WISE_SIDS_BFESS=60278_61027_60853_61610_61693_61780_61823_61844_61987; BAIDUID_BFESS=E04D67B5F25C419F6775388B1B95BE63:FG=1; BA_HECTOR=a181800g042k81210h2k05ag2nbdre1jp1jqb1v; Hm_lvt_aec699bb6442ba076c8981c6dc490771=1736987101,1737193278,1737460267,1737543515; ZFY=aur9HrpWYrn7Jw8b8ogzBa:AcXNMsyOna:AgftxeHR2dA:C; B64_BOT=1; baikeVisitId=b013de5f-a221-42f3-8f22-8ea63adf36d5; COOKIE_SESSION=498_0_9_8_10_7_1_0_8_5_38_0_489_0_4_0_1737548927_0_1737548923%7C9%2377433_15_1736991585%7C5; H_PS_PSSID=60278_61027_60853_61693_61780_61823_61844_61987; H_WISE_SIDS=60278_61027_60853_61693_61780_61823_61844_61987; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BD_HOME=1',
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
}
# 定制请求头
request = urllib.request.Request(url=url, headers=headers)
# 模拟浏览器访问数据库
response = urllib.request.urlopen(request)
# 获取网页源码
content = response.read().decode('utf-8')
# print(content)

# 保存到文件中
# f = open('./download/baidu.html','w',encoding='utf-8')
# f.write(content)
# f.close()


# 解析 服务器响应文件 etree.HTML
tree = etree.HTML(content)

# 解析 想要的数据  input 按钮 百度一下
my_data = tree.xpath("//input[@id='su']/@value")
print(my_data)











